## countingquery_solution.r
## Vito D'Orazio
## June 16, 2014
## This script shows four ways a system can respond to a counting query, some of which are differentially private.


library(VGAM)

rm(list=ls())
setwd("/Users/vjdorazio/Desktop/IQSS/privacy_tools/R_workshop")

mydata <- read.csv("PUMS5extract.csv")

## query: how many individuals are married?

## Adam's first solution
sol1 <- length(which(mydata$married==1))


## Adam's second solution
sol2 <- sol1+rlaplace(n=1, scale= 10)

# can we do this same thing using location in rlaplace()?


## Adam's third solution
myprob <- 100 / nrow(mydata)
mydata$flag <- rbinom(n=nrow(mydata), size=1, prob=myprob)
t <- mydata[which(mydata$flag==1),]
sol3 <- length(which(t$married==1))

# increase the sample size for a better approximation
myprob <- 10000 / nrow(mydata)
mydata$flag <- rbinom(n=nrow(mydata), size=1, prob=myprob)
t <- mydata[which(mydata$flag==1),]
sol3 <- length(which(t$married==1))


## Adam's fourth solution
epsilon <- 0.1
mydata$q <- 0


mydata$q[which(mydata$married==1)] <- rbinom(n=1, size=1, prob=.5+epsilon)
mydata$q[which(mydata$married==0)] <- rbinom(n=1, size=1, prob=.5-epsilon)

#... not quite.  Where's my error?

